from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader

# 实例化文档加载器
loader = TextLoader("real_estate_sales_data.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0, separator=r'\d+\.', is_separator_regex=True)
docs = text_splitter.split_documents(documents)
print(docs)
# OpenAI Embedding 模型
embeddings = OpenAIEmbeddings()
# FAISS 向量数据库，使用 docs 的向量作为初始化存储
db = FAISS.from_documents(docs, embeddings)
db.save_local("real_estates_sale")
